import os


def read_txt_words(file):
    """
    读取txt的停用词
    :param file:
    :return:
    """
    with open(file, 'r', encoding='utf8') as f:
        words = f.readlines()

    words = [word[: -1] for word in words if len(word) > 1]
    return words


def get_stop_words():
    """
    读取中英文的停用词
    :return:
    """
    folds_path = './data/chp4/stop_words'
    files = os.listdir(folds_path)
    en_words = set()
    cn_words = set()
    for file in files:
        file_all = os.path.join(folds_path, file)
        words = read_txt_words(file_all)
        if 'en' in file.lower() and 'cn' not in file.lower():
            en_words = en_words|set(words)
        else:
            cn_words = cn_words|set(words)
    return list(en_words), list(cn_words)

